DATA_ROOT := ../dataset
TMP_ROOT := ./tmp
train_enrollment_csv := $(DATA_ROOT)/enrollment_train.csv
test_enrollment_csv := $(DATA_ROOT)/test/enrollment_test.csv
train_log_csv := $(DATA_ROOT)/log_train.csv
test_log_csv := $(DATA_ROOT)/test/log_test.csv

train_part_enrollment_id: 
	./split_n_parts.py $(train_enrollment_csv) 10 $(TMP_ROOT)/train_part_enrollment_id

train1: $(TMP_ROOT)/train1 ./merge_feature.py
$(TMP_ROOT)/train1:
	cat $(TMP_ROOT)/train_part_enrollment_id* | \
	./merge_feature.py \
	"$(DATA_ROOT)/truth_train.csv \
	usercourse/tmp/train_user_course_action_distance_describe \
	usercourse/tmp/train_user_enter_course_hour_latest.feas" \
	$(TMP_ROOT)/train1
test1: $(TMP_ROOT)/test1 ./merge_feature.py
$(TMP_ROOT)/test1:
	cat $(TMP_ROOT)/test_enrollment_id | \
	./merge_feature.py \
	"usercourse/tmp/test_user_course_action_distance_describe \
	usercourse/tmp/test_user_enter_course_hour_latest.feas" \
	$(TMP_ROOT)/test1
train1.parts: train1 test1
	./rand_sample_trainset.py $(TMP_ROOT)/train1 4 $(TMP_ROOT)/train1.parts

train2: 
	cat $(TMP_ROOT)/train_part_enrollment_id* | \
	./merge_feature.py \
	"$(DATA_ROOT)/truth_train.csv \
	usercourse/tmp/train_user_course_action_distance_describe \
	usercourse/tmp/train_user_enter_course_hour_latest.feas \
	usercourse/tmp/train_user_course_action_latest_count.10 \
	usercourse/tmp/train_user_course_action_latest_count.5 \
	usercourse/tmp/train_user_enter_course_day.feas \
	usercourse/tmp/train_user_enter_course_day_latest.feas \
	usercourse/tmp/train_user_enter_course_hour_latest.feas \
	usercourse/tmp/train_user_latest_two_action_distance" \
	$(TMP_ROOT)/train2

test2:
	cat $(TMP_ROOT)/test_enrollment_id | \
	./merge_feature.py \
	"usercourse/tmp/test_user_course_action_distance_describe \
	usercourse/tmp/test_user_enter_course_hour_latest.feas \
	usercourse/tmp/test_user_course_action_latest_count.10 \
	usercourse/tmp/test_user_course_action_latest_count.5 \
	usercourse/tmp/test_user_enter_course_day.feas \
	usercourse/tmp/test_user_enter_course_day_latest.feas \
	usercourse/tmp/test_user_enter_course_hour_latest.feas \
	usercourse/tmp/test_user_latest_two_action_distance" \
	$(TMP_ROOT)/test2
train2.parts: train2 test2
	./rand_sample_trainset.py $(TMP_ROOT)/train2 4 $(TMP_ROOT)/train2.parts
train4.parts: $(TMP_ROOT)/train4 $(TMP_ROOT)/test4
	./rand_sample_trainset.py $(TMP_ROOT)/train4 4 $(TMP_ROOT)/train4.parts

# 加入 user_course event ratio
train3: 
	cat $(TMP_ROOT)/train_part_enrollment_id* | \
	./merge_feature.py \
	"$(DATA_ROOT)/truth_train.csv \
	usercourse/tmp/train_user_course_action_distance_describe \
	usercourse/tmp/train_user_enter_course_hour_latest.feas \
	usercourse/tmp/train_user_course_action_latest_count.10 \
	usercourse/tmp/train_user_course_action_latest_count.5 \
	usercourse/tmp/train_user_enter_course_day.feas \
	usercourse/tmp/train_user_enter_course_day_latest.feas \
	usercourse/tmp/train_user_enter_course_hour_latest.feas \
	usercourse/tmp/train_user_latest_two_action_distance \
	usercourse/tmp/train_user_course_action_ratio_of_all" \
	$(TMP_ROOT)/train3
test3:
	cat $(TMP_ROOT)/test_enrollment_id | \
	./merge_feature.py \
	"usercourse/tmp/test_user_course_action_distance_describe \
	usercourse/tmp/test_user_enter_course_hour_latest.feas \
	usercourse/tmp/test_user_course_action_latest_count.10 \
	usercourse/tmp/test_user_course_action_latest_count.5 \
	usercourse/tmp/test_user_enter_course_day.feas \
	usercourse/tmp/test_user_enter_course_day_latest.feas \
	usercourse/tmp/test_user_enter_course_hour_latest.feas \
	usercourse/tmp/test_user_latest_two_action_distance \
	usercourse/tmp/test_user_course_action_ratio_of_all " \
	$(TMP_ROOT)/test3

# 测试单个特征的有效性
$(TMP_ROOT)/train0: 
	cat $(TMP_ROOT)/train_part_enrollment_id* | \
	./merge_feature.py \
	"$(DATA_ROOT)/truth_train.csv \
	usercourse/tmp/train_user_course_action_ratio_of_all \
	usercourse/tmp/train_user_course_action_ratio_of_all.5 \
	usercourse/tmp/train_user_course_action_ratio_of_all.10" \
	$(TMP_ROOT)/train0
$(TMP_ROOT)/test0:
	cat $(TMP_ROOT)/test_enrollment_id | \
	./merge_feature.py \
	"usercourse/tmp/test_user_course_action_ratio_of_all \
	usercourse/tmp/test_user_course_action_ratio_of_all.5 \
	usercourse/tmp/test_user_course_action_ratio_of_all.10" \
	$(TMP_ROOT)/test0
train0.fm.file:  $(TMP_ROOT)/train0 $(TMP_ROOT)/test0
	./csv2libfm_format.py $(TMP_ROOT)/train0 0 1 
	./csv2libfm_format.py $(TMP_ROOT)/test0 0  0 



# 加入 ratio.2 后 GBDT 效果变差
train4: 
	cat $(TMP_ROOT)/train_part_enrollment_id* | \
	./merge_feature.py \
	"$(DATA_ROOT)/truth_train.csv \
	usercourse/tmp/train_user_course_action_distance_describe \
	usercourse/tmp/train_user_enter_course_hour_latest.feas \
	usercourse/tmp/train_user_course_action_latest_count.10 \
	usercourse/tmp/train_user_course_action_latest_count.5 \
	usercourse/tmp/train_user_enter_course_day.feas \
	usercourse/tmp/train_user_enter_course_day_latest.feas \
	usercourse/tmp/train_user_enter_course_hour_latest.feas \
	usercourse/tmp/train_user_latest_two_action_distance \
	usercourse/tmp/train_user_course_action_ratio_of_all \
	usercourse/tmp/train_user_course_action_ratio_of_all.5 \
	usercourse/tmp/train_user_course_action_ratio_of_all.10" \
	$(TMP_ROOT)/train4
test4:
	cat $(TMP_ROOT)/test_enrollment_id | \
	./merge_feature.py \
	"usercourse/tmp/test_user_course_action_distance_describe \
	usercourse/tmp/test_user_enter_course_hour_latest.feas \
	usercourse/tmp/test_user_course_action_latest_count.10 \
	usercourse/tmp/test_user_course_action_latest_count.5 \
	usercourse/tmp/test_user_enter_course_day.feas \
	usercourse/tmp/test_user_enter_course_day_latest.feas \
	usercourse/tmp/test_user_enter_course_hour_latest.feas \
	usercourse/tmp/test_user_latest_two_action_distance \
	usercourse/tmp/test_user_course_action_ratio_of_all \
	usercourse/tmp/test_user_course_action_ratio_of_all.5 \
	usercourse/tmp/test_user_course_action_ratio_of_all.10" \
	$(TMP_ROOT)/test4

# 加入用户的weekday event 次数特征
# GBDT效果下降
$(TMP_ROOT)/train5: 
	cat $(TMP_ROOT)/train_part_enrollment_id* | \
	./merge_feature.py \
	"$(DATA_ROOT)/truth_train.csv \
	usercourse/tmp/train_user_course_action_distance_describe \
	usercourse/tmp/train_user_enter_course_hour_latest.feas \
	usercourse/tmp/train_user_course_action_latest_count.10 \
	usercourse/tmp/train_user_course_action_latest_count.5 \
	usercourse/tmp/train_user_enter_course_day.feas \
	usercourse/tmp/train_user_enter_course_day_latest.feas \
	usercourse/tmp/train_user_enter_course_hour_latest.feas \
	usercourse/tmp/train_user_latest_two_action_distance \
	usercourse/tmp/train_user_course_action_ratio_of_all \
	usercourse/tmp/train_user_course_action_ratio_of_all.5 \
	usercourse/tmp/train_user_course_action_ratio_of_all.10 \
	usercourse/tmp/train.user_course_action_weekday.overall.ratio \
	" \
	$(TMP_ROOT)/train5
$(TMP_ROOT)/test5:
	cat $(TMP_ROOT)/test_enrollment_id | \
	./merge_feature.py \
	"usercourse/tmp/test_user_course_action_distance_describe \
	usercourse/tmp/test_user_enter_course_hour_latest.feas \
	usercourse/tmp/test_user_course_action_latest_count.10 \
	usercourse/tmp/test_user_course_action_latest_count.5 \
	usercourse/tmp/test_user_enter_course_day.feas \
	usercourse/tmp/test_user_enter_course_day_latest.feas \
	usercourse/tmp/test_user_enter_course_hour_latest.feas \
	usercourse/tmp/test_user_latest_two_action_distance \
	usercourse/tmp/test_user_course_action_ratio_of_all \
	usercourse/tmp/test_user_course_action_ratio_of_all.5 \
	usercourse/tmp/test_user_course_action_ratio_of_all.10 \
	usercourse/tmp/test.user_course_action_weekday.overall.ratio \
	" \
	$(TMP_ROOT)/test5

$(TMP_ROOT)/train6: 
	cat $(TMP_ROOT)/train_part_enrollment_id* | \
	./merge_feature.py \
	"$(DATA_ROOT)/truth_train.csv \
	usercourse/tmp/train_user_course_action_distance_describe \
	usercourse/tmp/train_user_enter_course_hour_latest.feas \
	usercourse/tmp/train_user_course_action_latest_count.10 \
	usercourse/tmp/train_user_course_action_latest_count.5 \
	usercourse/tmp/train_user_enter_course_day.feas \
	usercourse/tmp/train_user_enter_course_day_latest.feas \
	usercourse/tmp/train_user_enter_course_hour_latest.feas \
	usercourse/tmp/train_user_latest_two_action_distance \
	usercourse/tmp/train_user_course_action_ratio_of_all \
	usercourse/tmp/train_user_course_action_ratio_of_all.5 \
	usercourse/tmp/train_user_course_action_ratio_of_all.10 \
	usercourse/tmp/train_user_course_action_time.event_ratio \
	" \
	$(TMP_ROOT)/train6
$(TMP_ROOT)/test6:
	cat $(TMP_ROOT)/test_enrollment_id | \
	./merge_feature.py \
	"usercourse/tmp/test_user_course_action_distance_describe \
	usercourse/tmp/test_user_enter_course_hour_latest.feas \
	usercourse/tmp/test_user_course_action_latest_count.10 \
	usercourse/tmp/test_user_course_action_latest_count.5 \
	usercourse/tmp/test_user_enter_course_day.feas \
	usercourse/tmp/test_user_enter_course_day_latest.feas \
	usercourse/tmp/test_user_enter_course_hour_latest.feas \
	usercourse/tmp/test_user_latest_two_action_distance \
	usercourse/tmp/test_user_course_action_ratio_of_all \
	usercourse/tmp/test_user_course_action_ratio_of_all.5 \
	usercourse/tmp/test_user_course_action_ratio_of_all.10 \
	usercourse/tmp/test_user_course_action_time.event_ratio \
	" \
	$(TMP_ROOT)/test6



# 保存原始的标签信息
train4.csv: 
	cat $(TMP_ROOT)/train_part_enrollment_id* | \
	./merge_feature_to_csv.py \
	"$(DATA_ROOT)/truth_train.csv \
	usercourse/tmp/train_user_course_action_distance_describe \
	usercourse/tmp/train_user_enter_course_hour_latest.feas \
	usercourse/tmp/train_user_course_action_latest_count.10 \
	usercourse/tmp/train_user_course_action_latest_count.5 \
	usercourse/tmp/train_user_enter_course_day.feas \
	usercourse/tmp/train_user_enter_course_day_latest.feas \
	usercourse/tmp/train_user_enter_course_hour_latest.feas \
	usercourse/tmp/train_user_latest_two_action_distance \
	usercourse/tmp/train_user_course_action_ratio_of_all \
	usercourse/tmp/train_user_course_action_ratio_of_all.5 \
	usercourse/tmp/train_user_course_action_ratio_of_all.10" \
	$(TMP_ROOT)/train4.csv
test4.csv:
	cat $(TMP_ROOT)/test_enrollment_id | \
	./merge_feature_to_csv.py \
	"usercourse/tmp/test_user_course_action_distance_describe \
	usercourse/tmp/test_user_enter_course_hour_latest.feas \
	usercourse/tmp/test_user_course_action_latest_count.10 \
	usercourse/tmp/test_user_course_action_latest_count.5 \
	usercourse/tmp/test_user_enter_course_day.feas \
	usercourse/tmp/test_user_enter_course_day_latest.feas \
	usercourse/tmp/test_user_enter_course_hour_latest.feas \
	usercourse/tmp/test_user_latest_two_action_distance \
	usercourse/tmp/test_user_course_action_ratio_of_all \
	usercourse/tmp/test_user_course_action_ratio_of_all.5 \
	usercourse/tmp/test_user_course_action_ratio_of_all.10" \
	$(TMP_ROOT)/test4.csv

train8.csv:
	cat $(TMP_ROOT)/train_part_enrollment_id* | \
	./merge_feature.py \
	"$(DATA_ROOT)/truth_train.csv \
	../feature/tmp/train4.total.csv  \
	" \
	$(TMP_ROOT)/train8.csv

test8.csv:
	cat $(TMP_ROOT)/test_enrollment_id | \
	./merge_feature_to_csv.py \
	"../feature/tmp/test.total.csv" \
	$(TMP_ROOT)/test8.csv

# test4.fm.file 是目前最稳定的feature

train1.fm.file: $(TMP_ROOT)/train1
	./csv2libfm_format.py $(TMP_ROOT)/train1.parts.0 0.2 1 
train2.fm.file: $(TMP_ROOT)/train2 $(TMP_ROOT)/test2
	./csv2libfm_format.py $(TMP_ROOT)/train2.parts.0 0.2 1 
	./csv2libfm_format.py $(TMP_ROOT)/test2 0.0 0 
	./csv2libfm_format.py $(TMP_ROOT)/train2 0.3 1 

train3.fm.file:  $(TMP_ROOT)/train3 $(TMP_ROOT)/test3
	./csv2libfm_format.py $(TMP_ROOT)/train3 0.3 1 
	./csv2libfm_format.py $(TMP_ROOT)/test3 0.0 0 

train4.fm.file:  $(TMP_ROOT)/train4 $(TMP_ROOT)/test4
	./csv2libfm_format.py $(TMP_ROOT)/train4 0 1 
	./csv2libfm_format.py $(TMP_ROOT)/test4 0  0 

train4.parts.fm.file: 
	@for no in 0 1 2 3; do \
	./csv2libfm_format.py $(TMP_ROOT)/train4.parts.$${no} 0.2 1; \
	done


train5.fm.file:  $(TMP_ROOT)/train5 $(TMP_ROOT)/test5
	./csv2libfm_format.py $(TMP_ROOT)/train5 0 1 
	./csv2libfm_format.py $(TMP_ROOT)/test5 0  0 

train6.fm.file:  $(TMP_ROOT)/train6 $(TMP_ROOT)/test6
	./csv2libfm_format.py $(TMP_ROOT)/train6 0 1 
	./csv2libfm_format.py $(TMP_ROOT)/test6 0  0 
train6.1.fm.file:  $(TMP_ROOT)/train6 $(TMP_ROOT)/test6
	./csv2libfm_format.py $(TMP_ROOT)/train6 0 1 
	./csv2libfm_format.py $(TMP_ROOT)/test6 0  0 

# 合并原有的fm特征 和GBDT特征
train2.with.gbdt_fea: $(TMP_ROOT)/train2.train.fm.gbdt.leaf.index.fm $(TMP_ROOT)/train2.valid.fm.gbdt.leaf.index.fm $(TMP_ROOT)/test2.train.fm.gbdt.leaf.index.fm
	paste -d " " $(TMP_ROOT)/train2.train.fm $(TMP_ROOT)/train2.train.fm.gbdt.leaf.index.fm > $(TMP_ROOT)/train2.train.with.gbdt_fea.fm
	paste -d " " $(TMP_ROOT)/train2.valid.fm $(TMP_ROOT)/train2.valid.fm.gbdt.leaf.index.fm > $(TMP_ROOT)/train2.valid.with.gbdt_fea.fm
	paste -d " " $(TMP_ROOT)/test2.train.fm $(TMP_ROOT)/test2.train.fm.gbdt.leaf.index.fm > $(TMP_ROOT)/test2.valid.with.gbdt_fea.fm
	

start_index := 61
$(TMP_ROOT)/train2.train.fm.gbdt.leaf.index.fm: $(TMP_ROOT)/train2.train.fm.gbdt.leaf.index
	./leaf_index2fm.py $(TMP_ROOT)/train2.train.fm.gbdt.leaf.index $(start_index) $(TMP_ROOT)/train2.train.fm.gbdt.leaf.index.fm
$(TMP_ROOT)/train2.valid.fm.gbdt.leaf.index.fm: $(TMP_ROOT)/train2.valid.fm.gbdt.leaf.index
	./leaf_index2fm.py $(TMP_ROOT)/train2.valid.fm.gbdt.leaf.index $(start_index) $(TMP_ROOT)/train2.valid.fm.gbdt.leaf.index.fm
$(TMP_ROOT)/test2.train.fm.gbdt.leaf.index.fm: $(TMP_ROOT)/test2.train.fm.gbdt.leaf.index
	./leaf_index2fm.py $(TMP_ROOT)/test2.train.fm.gbdt.leaf.index $(start_index) $(TMP_ROOT)/test2.train.fm.gbdt.leaf.index.fm

